/*******************************************************************************
Project:		Wealth -- Smith, Zidar, and Zwick
Last modified: 	2021-06-29
Description:	This file cleans the SCF 2016 full file to create a microfile
				with detailed information on closely-held businesses. It closely
				follows the work in xpds_scfprivbiz20210512.do. See also DS note
				SCFBizAtTheTop20210628.pdf.
*******************************************************************************/

/*******************************************************************************
	(1) Load 2016 SCF full file and retrieve variables we want
*******************************************************************************/

clear
clear mata
clear matrix
set maxvar 6000

use "$rawdir/complete_data/p16i6.dta", clear // 2016 full file

#delimit ;
keep YY1 Y1 X42001 /* Weight */
	X3103 /* Do you own or share ownership in any privately-held businesses? */ 
	X3105 /* In how many privately-held businesses...do you own or share 
			 ownership in and have an active management role? (top-coded at 25) */
	X3111 X3211 /* How many people work in this business, including you, members of
	 		 your family, or anyone who is working without pay? */
	X3119 X3219 /* Is [the business] a partnership, a sole-proprietorship, an LLC, 
			 a subchapter S corporation, another type of corporation, or 
			 something else? */
	X3128 X3228 /* What percentage of the business do you (and your family living 
			 here own? */
	X3172 X3272 /* What fraction of this do you personally own? */ 
	X3129 X3229 /* What is the net worth of (your share of) this business? */
	X3130 X3230 /* If you sold the business now, what would be the cost basis 
			 for tax purposes of your share of the business? */ 
	X3131 X3231 /* What were the gross sales of the business as a whole in 2015? */
	X3132 X3232 /* What was the business's total pre-tax net income in 2015? */
	X3335 /* For the remaining businesses (2+) you own and actively manage, 
			 what could you sell your share for? */
	X3336 /* For these remaining businesses, if you sold these businesses 
		     now, what would be the cost basis for tax purposes of your 
		     share? */ 
	X3337 /* For these remaining businesses, what was the total net income 
			 you received from these businesses in 2015? */ 
	X3174 /* Do any of these businesses have fewer than 500 employees? */
	X3402 /* In how many businesses do you own or share ownership where 
			 you do not haave an aactive management role? */
	X3407 X3411 X3451 X3415 X3419 X3427 /* Are there are any LPs; other 
			 partnerships; LLCs; S-corporations; other corporations; other 
			 business */
	X3408 X3412 X3452 X3416 X3420 X3428 /* What could you sell your family's 
			 share for? by organizational form */
	X3409 X3413 X3453 X3417 X3421 X3429 /* If you sold his business now, what 
			 would be the cost basis for tax purposes? by organizational form */
	X3410 X3414 X3454 X3418 X3422 X3430 /* What was the total net income you
			 and your family living here received from this business in 2015? 
			 by organizational form */;
#delimit cr

/*******************************************************************************
	(2) Reformat data, make simple calculations
*******************************************************************************/

	/***************************************************************************
		(2.1) Give variables informative names
	***************************************************************************/

#delimit ;
rename X3103 anyprivbiz; 
rename X3105 numactbus; 
rename (X3111 X3211) (actbus1emp actbus2emp); 
rename (X3119 X3219) (actbus1orgform actbus2orgform); 
rename (X3128 X3228) (actbus1ownshare actbus2ownshare); 
rename (X3172 X3272) (actbus1rownshare actbus2rownshare); 
rename (X3129 X3229) (actbus1sharemktval actbus2sharemktval); 
rename (X3130 X3230) (actbus1sharecostbasis actbus2sharecostbasis); 
rename (X3131 X3231) (actbus1totalsales actbus2totalsales); 
rename (X3132 X3232) (actbus1totalnetincm actbus2totalnetincm); 
rename X3335 actbus3plus_sharemktval; 
rename X3336 actbus3plus_sharecostbasis; 
rename X3337 actbus3plus_sharenetincm;
rename X3174 actbus3plus_emp500;
rename X3402 numnonactbus;

rename (X3407 X3411 X3451 X3415 X3419 X3427) 
	(anyLP anyotherpart anyLLC anyScorp anyOthCorp anyOther); 

rename (X3408 X3412 X3452 X3416 X3420 X3428) 
	(LPsharemktval otherpartsharemktval LLCsharemktval Scorpsharemktval OthCorpsharemktval 
		Othersharemktval);

rename (X3409 X3413 X3453 X3417 X3421 X3429) 
	(LPsharecostbasis otherpartsharecostbasis LLCsharecostbasis Scorpsharecostbasis 
		OthCorpsharecostbasis Othersharecostbasis);

rename (X3410 X3414 X3454 X3418 X3422 X3430) 
	(LPsharenetincm otherpartsharenetincm LLCsharenetincm Scorpsharenetincm 
		OthCorpsharenetincm Othersharenetincm); 
#delimit cr

	/***************************************************************************
		(2.2) Make variable for total businesses owned (this should not be more
			than 50, b/c both active and non-active total variables are top-
			coded at 25)
	***************************************************************************/

gen numbus = numactbus + numnonactbus
qui summ numbus, meanonly
assert `r(max)' < 50

	/***************************************************************************
		(2.3) Make ownership share variables contained within 0-1; at the moment
			they're in 0-10^4.
	***************************************************************************/

foreach sharevar of varlist *ownshare {

	qui replace `sharevar' = max(0, `sharevar' / 1E4)
	assert inrange(`sharevar', 0, 1)

	qui summ `sharevar', meanonly
	assert `r(max)' == 1
}

	/***************************************************************************
		(2.4) Recode variables, removing -1 code for ``Inapplicable'' or 
			``Nothing''
	***************************************************************************/

foreach recodevar of varlist *sharecostbasis *sharemktval *netincm *totalsales {

	replace `recodevar' = 0 if `recodevar' == -1
	
	if !regexm("`recodevar'", "netincm") {
		assert `recodevar' >= 0
	}
}

	/***************************************************************************
		(2.5) Use ownership share to get market value and cost basis of entire
			business for actively-managed businesses 1 and 2. Also, get PEU's
			share of net income and sales.
	***************************************************************************/

forv busnum = 1 / 2 {
	gen actbus`busnum'totalmktval = cond(actbus`busnum'ownshare == 0, 0, ///
										actbus`busnum'sharemktval / actbus`busnum'ownshare)

	gen actbus`busnum'totalcostbasis = cond(actbus`busnum'ownshare == 0, 0, ///
										actbus`busnum'sharecostbasis / actbus`busnum'ownshare)

	gen actbus`busnum'sharesales = actbus`busnum'totalsales * actbus`busnum'ownshare
	gen actbus`busnum'sharenetincm = actbus`busnum'totalnetincm * actbus`busnum'ownshare

	assert !missing(actbus`busnum'totalmktval) & !missing(actbus`busnum'totalcostbasis) & ///
		!missing(actbus`busnum'sharesales) & !missing(actbus`busnum'sharenetincm)
}

	/***************************************************************************
		(2.6) Recode business ownership Y/N variables as indicators
	***************************************************************************/

recode anyprivbiz anyLP anyotherpart anyLLC anyScorp anyOthCorp anyOther (5 = 0) // Yes = 1, No = 0

gen oneplusactbus = numactbus >= 1
gen twoplusactbus = numactbus >= 2
gen threeplusactbus = numactbus >= 3

gen anynonactbus = numnonactbus > 0

	/***************************************************************************
		(2.7) Compute aggregates across non-actively managed business categories
	***************************************************************************/

foreach addup in mktval costbasis netincm {

	gen nonactshares`addup' = LPshare`addup' + otherpartshare`addup' + LLCshare`addup' + ///
						      Scorpshare`addup' + OthCorpshare`addup' + Othershare`addup'
}

	/***************************************************************************
		(2.8) Compute multiples: 
			- market value / revenues
			- market value / net income
			- market value / cost basis
		Some weirdos say that they have zero sales, net income, or cost basis; 
		top-censor at P99 to deal with this as a first pass. Also make concepts 
		that censor per Eric's build_industry_valuations code.
	***************************************************************************/

/* See build_industry_valuations in Eric's programs code, e.g. programs_20210225.do 
	in the syzzle repo. Eric says these numbers are based on ``observed multiples 
	in Compustat,'' which makes me nervous but alas. */
local salescensor = 5
local costbasiscensor = 20
local netincmcensor = 50

foreach denom in sales netincm costbasis {

	forv busnum = 1 / 2 { // Active businesses 1 and 2

		qui gen actbus`busnum'mktval_`denom' = cond(numactbus < `busnum', 0, actbus`busnum'totalmktval / actbus`busnum'total`denom')

		assert !missing(actbus`busnum'mktval_`denom') if actbus`busnum'total`denom' != 0

		* Top-censor at P99, bottom-censor at 0
		qui summ actbus`busnum'mktval_`denom' if numactbus >= `busnum' [aw = X42001], detail
		qui replace actbus`busnum'mktval_`denom' = `r(p99)' if actbus`busnum'mktval_`denom' > `r(p99)'
		qui replace actbus`busnum'mktval_`denom' = 0 if actbus`busnum'mktval_`denom' < 0

		* ``Based on observed multiples in Compustat''
		gen actbus`busnum'mktval_`denom'_cnsr = cond(actbus`busnum'mktval_`denom' < ``denom'censor', ///
													 actbus`busnum'mktval_`denom', ``denom'censor')

	}

	if "`denom'" != "sales" { // Sales not available for active biz 3+ and non-actively mgd biz

		* Active businesses 3+ (don't have sales or total values for these businesses)
		qui gen actbus3plus_mktval_`denom' = cond(numactbus < 3, 0, actbus3plus_sharemktval / actbus3plus_share`denom')

		assert !missing(actbus3plus_mktval_`denom') if actbus3plus_share`denom' != 0

		qui summ actbus3plus_mktval_`denom' if numactbus >= 3 [aw = X42001], detail // Top-censor at P99
		qui replace actbus3plus_mktval_`denom' = `r(p99)' if actbus3plus_mktval_`denom' > `r(p99)'
		qui replace actbus3plus_mktval_`denom' = 0 if actbus3plus_mktval_`denom' < 0
		
		gen actbus3p_mktval_`denom'_cnsr = cond(actbus3plus_mktval_`denom' < ``denom'censor', ///
												actbus3plus_mktval_`denom', ``denom'censor')

		* Non-actively managed business on aggregate
		qui gen nonactmktval_`denom' = cond(anynonactbus == 0, 0, nonactsharesmktval / nonactshares`denom')

		assert !missing(nonactmktval_`denom') if nonactshares`denom' != 0
		qui summ nonactmktval_`denom' if anynonactbus == 1 [aw = X42001], detail
		qui replace nonactmktval_`denom' = `r(p99)' if nonactmktval_`denom' > `r(p99)'	
		qui replace nonactmktval_`denom' = 0 if nonactmktval_`denom' < 0

		gen nonactmktval_`denom'_cnsr = cond(nonactmktval_`denom' < ``denom'censor', ///
											 nonactmktval_`denom', ``denom'censor')

		* Non-actively managed business by organizational form
		foreach orgform in LP otherpart LLC Scorp OthCorp Other {
			
			qui gen nonact`orgform'_mktval_`denom' = cond(any`orgform' != 1, 0, `orgform'sharemktval / `orgform'share`denom')
			
			assert !missing(nonact`orgform'_mktval_`denom') if `orgform'share`denom' != 0

			qui summ nonact`orgform'_mktval_`denom' if any`orgform' == 1 [aw = X42001], detail
			qui replace nonact`orgform'_mktval_`denom' = `r(p99)' if nonact`orgform'_mktval_`denom' > `r(p99)'
			qui replace nonact`orgform'_mktval_`denom' = 0 if nonact`orgform'_mktval_`denom' < 0
		}
	}
}

/*******************************************************************************
	(3) Merge to preferred SCF microfile to get AGI and net worth variables; get
		TU-scaled ranks. Then save microfile in dumpdir.
*******************************************************************************/

gen year = 2016

merge 1:1 year YY1 Y1 using $dtadir/scf_revision.dta, ///
	keepusing(networth_pref wgt wgt1B) assert(2 3) keep(3) nogen

assert wgt == X42001 / 5

tu_rank_scf, rankvar(networth_pref) outname(tu_wlthrank)

gen top1wlth = tu_wlthrank > 0.99
gen top01wlth = tu_wlthrank > 0.999
gen top1_01wlth = top1wlth == 1 & top01wlth == 0

/*******************************************************************************
	(4) Clean up and save
*******************************************************************************/

drop year
order YY1 Y1 wgt wgt1B networth_pref *rank top*

save $dtadir/scf2016bizdetail.dta, replace